################################################################################
# Joshua C. Fjelstul, Ph.D.
# "How the Chamber System at the CJEU Undermines the Consistency of the 
# Court's Application of EU Law"
# Journal of Law and Courts
# replication code for the random forest analysis
################################################################################

# install ggminimal package (a ggplot theme)
# devtools::install_github("jfjelstul/ggminimal")

# libraries
library(tidyverse)
library(lubridate)
library(caret)
library(randomForest)
library(ggplot2)
library(ggminimal)
library(patchwork)

##################################################
# data for model
##################################################

# load data
load("replication-materials/assignments.RData")

# model data
model_data <- assignments

# cleaned variable names
model_data$variable_names <- str_replace_all(model_data$judge, " ", "_")
model_data$variable_names <- str_replace_all(model_data$variable_names, "'", "_")

# is the judge on the panel?
model_data$on_panel <- 1

# disposition
model_data$disposition <- factor(model_data$successful, levels = c(0, 1), labels = c("member_state", "commission"))

# pivot wider
model_data <- pivot_wider(
  model_data, 
  id_cols = c(
    ecli, disposition
  ), 
  names_from = variable_names, 
  values_from = on_panel
)
model_data[is.na(model_data)] <- 0

# drop judgment ID
model_data <- select(model_data, -ecli)

# save
save(model_data, file = "replication-materials/model_data.RData")

##################################################
# train model
##################################################

# load data
load("replication-materials/model_data.RData")

# set seed
set.seed(12345)

# indexes for the training set
train_index <- createDataPartition(model_data$disposition, p = 0.7, list = FALSE)
train_data <- model_data[train_index, ]
test_data <- model_data[-train_index, ]

# training parameters
control <- trainControl(
  method = "oob",
  number = 10,
  sampling = "up",
  verboseIter = TRUE
)

# set seed
set.seed(12345)

# train the model
trained_model <- train(
  form = disposition ~ ., 
  data = model_data,
  method = "rf",
  trees = 1000,
  tuneGrid = data.frame(mtry = seq(10, 90, 10)),
  trControl = control
)

# save the trained model
save(trained_model, file = "replication-materials/trained_model.RData")

##################################################
# performance metrics
##################################################

# load the trained model
load("data/trained_model.RData")

# create a confusion matrix
confusion_matrix <- trained_model$finalModel$confusion
confusion_matrix <- confusion_matrix[, -3]

# metrics of member state is the target
confusionMatrix(
  data = confusion_matrix,
  mode = "prec_recall",
  positive = "member_state"
)

# metrics if Commission is the target
confusionMatrix(
  data = confusion_matrix,
  mode = "prec_recall", 
  positive = "commission"
)

##################################################
# counterfactual simulations
##################################################

# load data
load("replication-materials/judges.RData")
load("replication-materials/model_data.RData")
load("replication-materials/trained_model.RData")

# make a list of current judges
current_judges <- judges$judge[judges$end_year == 2018]
current_judges <- current_judges[current_judges != "Vajda"]
current_judges <- str_replace_all(current_judges, " ", "_")

# make a list of all judges
all_judges <- names(model_data)[-1]
current_indexes <- which(all_judges %in% current_judges)

# set the seed
set.seed(12345)

# run the simulation
sim_1 <- matrix(nrow = 10000, ncol = 94)
for(i in 1:nrow(sim_1)) {
  index <- sample(1:94, size = sample(c(3, 5), size = 1), replace = FALSE)
  row <- rep(0, 94)
  row[index] <- 1
  sim_1[i, ] <- row
}
colnames(sim_1) <- names(model_data)[-1]
sim_1 <- as.data.frame(sim_1)

# calculate the predicted probabilities
probs_1 <- predict(trained_model, sim_1, type = "prob")
probs_1 <- as.numeric(probs_1[, 2])

# set the seed
set.seed(12345)

# run the simulation
sim_2 <- matrix(nrow = 10000, ncol = 94)
for(i in 1:nrow(sim_2)) {
  index <- sample(current_indexes, size = sample(c(3, 5), size = 1), replace = FALSE)
  row <- rep(0, 94)
  row[index] <- 1
  sim_2[i, ] <- row
}
colnames(sim_2) <- names(model_data)[-1]
sim_2 <- as.data.frame(sim_2)

# calculate the predicted probabilities
probs_2 <- predict(trained_model, sim_2, type = "prob")
probs_2 <- as.numeric(probs_2[, 2])

# calculate the inter-quartile range for each distribution
IQR(probs_1)
IQR(probs_2)

##################################################
# Figure 6, Panel A
##################################################

# make plot
panel_a <- ggplot() +
  geom_density(aes(x = probs_1), adjust = 1, color = "black", fill = "black", alpha = 0.1) +
  geom_vline(xintercept = mean(probs_1), linetype = "dashed") + 
  theme_minimal() + 
  theme(
    axis.ticks.y = element_blank(), 
    axis.text.y = element_blank()
  ) + 
  titles_minimal(title = "Panel A", y = "Density", x = "Probability that the Commission wins")

##################################################
# Figure 6, Panel B
##################################################

# make Panel B
panel_b <- ggplot() +
  geom_density(aes(x = probs_2), adjust = 1, color = "black", fill = "black", alpha = 0.1) +
  geom_vline(xintercept = mean(probs_2), linetype = "dashed") + 
  theme_minimal() + 
  theme(
    axis.ticks.y = element_blank(), 
    axis.text.y = element_blank()
  ) + 
  titles_minimal(title = "Panel B", y = "Density", x = "Probability that the Commission wins")

##################################################
# Figure 6
##################################################

# combine panels
figure_6 <- panel_a + panel_b + plot_layout(nrow = 1, ncol = 2)

# save plot
ggsave(plot = figure_6, filename = "replication-materials/figure-6.pdf", device = "pdf", width = 10, height = 6, scale = 1.25)

################################################################################
# end R script
################################################################################
